RDA - Genomic offset predictions

Author

Juliette Archambeau

Published

May 19, 2023

Introduction

Most analyses conducted in this document are based on Capblancq and Forester (2021) and the associated Github repository.

Data

Genomic data

Code
# Population-based allele frequencies
# ===================================
geno <- read.csv(here("data/DryadRepo/ImputedGenomicData_AlleleFrequencies_withoutmaf.csv"),
                     row.names = 1)

# SNP sets
# ========
snp_sets <- readRDS(here("outputs/list_snp_sets.rds"))

Climatic data

Code
# Set of climatic variables
# =========================
clim_var <- readRDS(here("data/ClimaticData/NamesSelectedVariables.rds"))


# Climatic data
# =============
# we scale the past and future climatic data at the location of the populations
# with the parameters (mean and variance) of the past climatic data.
source(here("scripts/functions/generate_scaled_clim_datasets.R"))
clim_dfs <- generate_scaled_clim_datasets(clim_var,clim_ref_adj = FALSE)

Adaptive landscape

Projecting adaptive gradient(s) across space

Adaptively enriched genetic space

The different sets of SNPs (one set of control SNPs and two sets of putative adaptive loci) are used as multivariate response in a new ‘adaptively enriched’ RDA, using the selected climatic variables as explanatory variables.

Code
runRDA <- function(snp_set, clim_var, clim_df, geno){
  
# Keep the genomic data of the selected SNPs
geno <- geno[,snp_set]
  
# RDA formula  
form_rda <- paste("geno ~ ", paste(clim_var,collapse= " + "))
  
# run RDA with only the selected SNPs
rda_outliers <- rda(as.formula(form_rda), clim_df)

}
Code
snp_sets <- lapply(snp_sets, function(x) {
  
x$rda_model <- runRDA(snp_set = x$set_snps,
                      clim_var = clim_var,
                      clim_df = clim_dfs$clim_ref,
                      geno = geno)
return(x)

}) %>% setNames(names(snp_sets))

An RDA biplot can be used to visualize the relationship between the selected SNPs and the underlying climatic variables.

Code
make_RDAbiplot <- function(x) {
  
TAB_loci <- as.data.frame(scores(x$rda_model, choices=c(1:2), display="species", scaling="none"))
TAB_var <- as.data.frame(scores(x$rda_model, choices=c(1:2), display="bp"))

eigenvalues <- summary(eigenvals(x$rda_model, model = "constrained")) %>% as.data.frame()
varexp_axis1 <- eigenvalues[2,1] *100 
varexp_axis2 <- eigenvalues[2,2] *100 


ggplot() +
  geom_hline(yintercept=0, linetype="dashed", color = gray(.80), linewidth=0.6) +
  geom_vline(xintercept=0, linetype="dashed", color = gray(.80), linewidth=0.6) +
  geom_point(data = TAB_loci, aes(x=RDA1*3, y=RDA2*3), colour = "#EB8055FF", size = 2, alpha = 0.8) + #"#F9A242FF"
  geom_segment(data = TAB_var, aes(xend=RDA1, yend=RDA2, x=0, y=0), colour="black", linewidth=0.15, linetype=1, arrow=arrow(length = unit(0.02, "npc"))) +
  geom_text(data = TAB_var, aes(x=1.1*RDA1, y=1.1*RDA2, label = row.names(TAB_var)), size = 3) +
  xlab(paste0("RDA 1 (",round(varexp_axis1,1),"%)")) + 
  ylab(paste0("RDA 2 (",round(varexp_axis2,1),"%)")) +
  facet_wrap(~paste0(x$set_name, " (n=",length(x$set_snps),")")) +
  guides(color=guide_legend(title="Locus type")) +
  theme_bw(base_size = 11) +
  theme(panel.grid = element_blank(), 
        plot.background = element_blank(), 
        panel.background = element_blank(), 
        strip.text = element_text(size=11))

}
Code
rda_biplots <- lapply(snp_sets, make_RDAbiplot)

grid_RDAbiplots <- plot_grid(rda_biplots[[1]],rda_biplots[[2]],rda_biplots[[3]], 
                         nrow=1)
ggsave(here("figs/RDA/RDAbiplots_SNPsets.pdf"), grid_RDAbiplots, width = 14, height=6)

grid_RDAbiplots

Adaptive index across the landscape

From Capblancq and Forester (2021) and the associated Github repository: The scores (loadings) of the climatic variables along the RDA axes can be used to calculate a genetic-based index of adaptation for each pixel of the landscape. This index is estimated independently for each RDA axis of interest using the formula:

\[\sum_{i=1}^{n}a_ib_i\] where \(a\) is the climatic variable score (loading) along the RDA axis, \(b\) is the standardized value for this particular variable at the focal pixel, and \(i\) refers to one of the \(n\) different variables used in the RDA model.

Code
source(here("scripts/functions/project_adaptive_index.R"))

ai_proj <- lapply(snp_sets, function(snp_set){
  
project_adaptive_index(clim_var=clim_var,K=2,snp_set=snp_set)
  
})

The adaptive index thus provides an estimate of adaptive genetic similarity or difference of all pixels on the landscape as a function of the values of the climatic predictors at that location.

We project the adaptive index on maps to visualize the different adaptive gradients across the maritime pine range.

Code
# countries borders for the map
world <- ne_countries(scale = "medium", returnclass = "sf")

ai_maps <- lapply(snp_sets, function(snp_set){
  
RDA_proj <- ai_proj[[snp_set$set_code]]

RDA_proj <- lapply(RDA_proj, function(x) rasterToPoints(x))

# The adaptive index is scaled between 0 and 1
for(i in 1:length(RDA_proj)){
  RDA_proj[[i]][,3] <- (RDA_proj[[i]][,3]-min(RDA_proj[[i]][,3]))/(max(RDA_proj[[i]][,3])-min(RDA_proj[[i]][,3]))
}

# formatting the dataframes for ggplot
TAB_RDA <- as.data.frame(do.call(rbind, RDA_proj[1:2]))
colnames(TAB_RDA)[3] <- "value"
TAB_RDA$variable <- factor(c(rep("RDA1", nrow(RDA_proj[[1]])), rep("RDA2", nrow(RDA_proj[[2]]))), levels = c("RDA1","RDA2"))

# map options
point_size=2
x_limits = c(-10, 15)
y_limits = c(31, 52)
ggtitle <- snp_set$set_name


ggplot(data = TAB_RDA) + 
  geom_sf(data = world, fill="gray98") + 
  scale_x_continuous(limits = x_limits) +
  scale_y_continuous(limits = y_limits) + 
  geom_raster(aes(x = x, y = y, fill = cut(value, breaks=seq(0, 1, length.out=10), include.lowest = T))) + 
  scale_fill_viridis_d(alpha = 0.8, direction = -1, option = "A", labels = c("Negative scores","","","","Intermediate scores","","","","Positive scores")) +
  xlab("Longitude") + ylab("Latitude") +
  guides(fill=guide_legend(title="Adaptive index")) +
  facet_grid(~ variable) +
  ggtitle(ggtitle) +
  theme_bw(base_size = 11) +
  theme(panel.grid = element_blank(), plot.background = element_blank(), panel.background = element_blank(), strip.text = element_text(size=11))
})

pdf(here("figs/RDA/AdaptiveIndex_maps.pdf"), width=10,height=6)
lapply(ai_maps, function(x) x)
dev.off()

# show maps
lapply(ai_maps, function(x) x)

Genomic offset predictions

Using spatial points

Predicting the genomic offset of the studied populations under future climates.

Code
# Function to calculate the RDA genetic offset for spatial points
# ===============================================================

# clim_ref = climatic data used to fit the RDA model (the climate-of-origin of the populations)

# clim_pred = climatic data used for predictions (so either the future climate of the populations, 
            # or climate of the common gardens or the NFI plots)

# weights = if NULL, the adaptive index is not weighted by the relative importance of the RDA axes in 
# explaining the variance.

# K = number of RDA axes used to calculate the genomic offset

# snp_set = list with at least the RDA models 


pred_GO_spatial_points <- function(snp_set, 
                                   K, 
                                   clim_ref, 
                                   clim_pred, 
                                   clim_var,
                                   weights = NULL, 
                                   CG=F){
  
# weights based on the variance explained by the different axes
weights <- (snp_set$rda_model$CCA$eig/sum(snp_set$rda_model$CCA$eig))[1:K]


list_AI <- lapply(list(clim_ref,clim_pred), function(df){
  
  lapply(1:K, function(i){
  
# Below we calculate the adaptive index for the axis i
# For that, we multiply the value of the variables at the location of the population
    # by the loadings of the variables along the axis i
ai <- df %>%
  dplyr::select(any_of(clim_var)) %>% 
  apply(1, function(x) sum(x * snp_set$rda_model$CCA$biplot[,i]))

if(!is.null(weights)){ai <- ai * weights[i]}
    
  }) %>% 
    setNames(paste0("RDA", 1:length(.))) %>% 
    as.data.frame()
  
})

if(CG==F){
  
eucloffset <- unlist(lapply(1:nrow(list_AI[[1]]), function(x) dist(rbind(list_AI[[1]][x,], list_AI[[2]][x,]), method = "euclidean")))

} else if(CG==T){
  
eucloffset <- lapply(1:nrow(list_AI[[2]]), function(x) as.matrix(dist(rbind(list_AI[[2]][x,],list_AI[[1]]), method = "euclidean"))[2:(nrow(list_AI[[1]])+1),1]) %>% 
  setNames(clim_pred[,1] %>% pull()) %>% 
  as.data.frame() %>% 
  cbind(clim_ref[,1])
  
  
  }

return(eucloffset)
}
Code
snp_sets <- lapply(snp_sets, function(x){
  
x$go <- lapply(names(clim_dfs$clim_pred), function(gcm){
  
pred_GO_spatial_points(snp_set = x,
                       clim_var=clim_var,
                       K = 2, # why K=2??
                       clim_ref = clim_dfs$clim_ref,
                       clim_pred = clim_dfs$clim_pred[[gcm]],
                       weights = T)

}) %>% setNames(names(clim_dfs$clim_pred))

return(x)
})

Relationship with Euclidean distance

Code
source(here("scripts/functions/make_eucli_plot.R"))

# Calculate the Euclidean climatic distance
list_dist_env <- clim_dfs$clim_pred %>% lapply(function(clim_pred){
  
Delta = clim_dfs$clim_ref %>% dplyr::select(any_of(clim_var)) - clim_pred %>% dplyr::select(any_of(clim_var)) 
dist_env = sqrt( rowSums(Delta^2) )

})

# Main gene pools (for the figures)
gps <- readRDS(here("data/GenomicData/MainGenePoolPopulations.rds")) %>%  arrange(pop)
Code
par(mfrow=c(1,2))


lapply(snp_sets, function(x) {

lapply(names(list_dist_env), function(gcm){
  
make_eucli_plot(
  X = list_dist_env[[gcm]],
  Y = x$go[[gcm]],
  colors = gps$color_main_gp_pop,
  color_names = gps$main_gp_pop,
  ylab = "GDM genomic offset",
  legend_position="topleft",
  plot_title = paste0(x$set_name," - ", gcm))

})
}) 

Maps

Code
# Function to make the genomic offset maps
source(here("scripts/functions/make_go_map.R"))

# Population coordinates
pop_coord <-  readRDS(here(paste0("data/ClimaticData/MaritimePinePops/ClimatePopulationLocationPointEstimates_ReferencePeriods_noADJ.rds")))[[1]]$ref_means %>% dplyr::select(pop,longitude,latitude)

# Find minimum and maximum values of genomic offset for the maps
go_limits <- lapply(snp_sets, function(x) {
lapply(names(list_dist_env), function(gcm){
x$go[[gcm]]
}) %>%  unlist()
}) %>%  unlist() %>% range()
# The minimum GO value is very very small, almost zero, so we fix it to zero.
go_limits[[1]] <- 0


# Generate the maps for each set of SNPs and each GCM
lapply(snp_sets, function(x) {

go_maps <- lapply(names(list_dist_env), function(gcm){
  
make_go_map(dfcoord=pop_coord,
            snp_set = x,
            gcm=gcm,
            ggtitle=gcm,
            go_limits = go_limits,
            point_size = 3)

})

legend_maps  <- get_legend(go_maps[[1]])

go_maps <- lapply(go_maps, function(y) y + theme(legend.position = "none"))

go_maps$legend_maps <- legend_maps

go_maps <-plot_grid(plotlist=go_maps)

title <- ggdraw() + 
  draw_label(
    x$set_name,
    fontface = 'bold',
    x = 0,
    hjust = 0
  ) +
  theme(plot.margin = margin(0, 0, 0, 7))

# merge title and plots
plot_grid(
  title, go_maps,
  ncol = 1,
  rel_heights = c(0.1, 1))

  })

Comparing GO predictions

We look at the correlation across the different genomic offset predictions at the location of the populations, i.e. those based on all SNPs and those based on sets of candidates or control SNPs.

Code
par(mfrow=c(1,3))

lapply(names(snp_sets[[1]]$go), function(gcm){
  
lapply(snp_sets, function(x) x$go[[gcm]]) %>% 
  as_tibble() %>%
  cor() %>% 
  corrplot(method = 'number',type = 'lower', 
           diag = FALSE,mar=c(0,0,2,0),
           title=gcm,
           number.cex=2,tl.cex=1.5)
  
})

Using rasters

Code
# Function to calculate the RDA genetic offset using rasters
# ==========================================================

# Arguments
# =========
# snp_set = list with at least the RDA models 
# clim_var = selected climatic variables
# K = number of RDA axes used to calculate the genomic offset
# gcm = Global Climate Model of the raster with future climatic data
# clim_ref_adj = TRUE or FALSE, specify whether the point estimate climatic data used to scale the rasters should be adjusted or not for elevation
# ref_period = the reference period used to calculate the adaptive index, can be 1901-1950 or 1960-1991
# method = `loadings` or `predict` 



pred_GO_rasters <- function(snp_set, 
                                   clim_var,
                                   K, 
                                   gcm,
                                   range_buffer = shapefile(here('data/Mapping/PinpinDistriEUforgen_NFIplotsBuffer10km.shp')),
                                   method = "loadings",
                                   clim_ref_adj = FALSE,
                                   ref_period = "ref_1901_1950"){
  
# Load point estimate climatic data of the reference period
if(clim_ref_adj==TRUE){adj <- "ADJ"} else {adj <- "noADJ"}  
clim_ref_pe <- readRDS(here(paste0("data/ClimaticData/MaritimePinePops/ClimatePopulationLocationPointEstimates_ReferencePeriods_",adj,".rds")))[[ref_period]]
  
# Extract scaling parameters, i.e. mean and variance  
scale_params <- lapply(clim_var, function(x){
    
    vec_var <- clim_ref_pe$ref_means[,x] %>% pull()
    
    list(mean = mean(vec_var), sd = sd(vec_var))
    
}) %>% setNames(clim_var)
  
  
# Rasters with climatic data for the reference period
path <- here(paste0("data/ClimaticData/ClimateDTRasters/1km_",clim_ref_pe$range[[1]],"-",clim_ref_pe$range[[2]],"_Extent-JulietteA/"))
tif_paths <- lapply(clim_var, function(x) paste0(path,"/",x,".tif"))
ref_rasts <- raster::stack(tif_paths)
  
# Raster with future climatic data
path <- here(paste0("data/ClimaticData/ClimateDTRasters/1km_",gcm,"_2041-2070_ssp370_Extent-JulietteA/"))
tif_paths <- lapply(clim_var, function(x) paste0(path,"/",x,".tif"))
fut_rasts <- raster::stack(tif_paths)

# checking that the CRS is the same for the buffer and the rasters
if(identical(crs(range_buffer),crs(ref_rasts))==FALSE){
  stop(paste0("CRS of the range buffer is not the same as the CRS of the reference period raster."))} 
if(identical(crs(range_buffer),crs(fut_rasts))==FALSE){
  stop(paste0("CRS of the range buffer is not the same as the CRS of future climate rasters."))} 

# Mask with the range if supplied
if(!is.null(range_buffer)){
  ref_rasts <- mask(ref_rasts, range_buffer)
  fut_rasts <- mask(fut_rasts, range_buffer)}


# extract coordinates and climatic values, and mean-center the climatic data
ref_vals <- as.data.frame(rasterToPoints(ref_rasts))
for(i in clim_var){ref_vals[,i] <- (ref_vals[,i] - scale_params[[i]]$mean) / scale_params[[i]]$sd}
fut_vals <- as.data.frame(rasterToPoints(fut_rasts))
for(i in clim_var){fut_vals[,i] <- (fut_vals[,i] - scale_params[[i]]$mean) / scale_params[[i]]$sd}


# Predict genomic offset for each pixel based on the loadings of the climatic variables
if(method == "loadings"){
  
    ref_proj <- list()
    fut_proj <- list()
    go_proj <- list()
  
    # Projection for each RDA axis
    for(i in 1:K){
      
      # Calculate adaptive index for the reference period
      ref_rast <- ref_rasts[[1]]
      ref_rast[!is.na(ref_rast)] <- as.vector(apply(ref_vals[,clim_var], 1, function(x) sum( x * snp_set$rda_model$CCA$biplot[,i])))
      names(ref_rast) <- paste0("RDA_ref_", as.character(i))
      ref_proj[[i]] <- ref_rast
      names(ref_proj)[i] <- paste0("RDA", as.character(i))
      
      # Calculate adaptive index under future climates
      fut_rast <- fut_rasts[[1]]
      fut_rast[!is.na(fut_rast)] <- as.vector(apply(fut_vals[,clim_var], 1, function(x) sum( x * snp_set$rda_model$CCA$biplot[,i])))
      names(fut_rast) <- paste0("RDA_fut_", as.character(i))
      fut_proj[[i]] <- fut_rast
      names(fut_proj)[i] <- paste0("RDA", as.character(i))
      
      # Calculate genetic offset based on a single RDA axis
      go_proj[[i]] <- abs(ref_proj[[i]] - fut_proj[[i]])
      names(go_proj)[i] <- paste0("RDA", as.character(i))
    }
}


  
# Predict genomic offset for each pixel based on predict.RDA
  if(method == "predict"){
    
    # Prediction with the RDA model under reference-period climates and future climates
    ref_pred <- predict(snp_set$rda_model, ref_vals[,-c(1,2)], type = "lc")
    fut_pred <- predict(snp_set$rda_model, fut_vals[,-c(1,2)], type = "lc")
    
    ref_proj <- list()
    fut_proj <- list()
    go_proj <- list()
    
    for(i in 1:K){
      
      # Calculate adaptive index for the reference period
      ref_rast <- rasterFromXYZ(data.frame(ref_vals[,c(1,2)], Z = as.vector(ref_pred[,i])), crs = crs(ref_rasts))
      names(ref_rast) <- paste0("RDA_ref_", as.character(i))
      ref_proj[[i]] <- ref_rast
      names(ref_proj)[i] <- paste0("RDA", as.character(i))
      
      # Calculate adaptive index under future climates
      fut_rast <- rasterFromXYZ(data.frame(ref_vals[,c(1,2)], Z = as.vector(fut_pred[,i])), crs = crs(ref_rasts))
      names(fut_rast) <- paste0("RDA_fut_", as.character(i))
      fut_proj[[i]] <- fut_rast
      names(fut_proj)[i] <- paste0("RDA", as.character(i))
      
      # Calculate genetic offset based on a single RDA axis
      go_proj[[i]] <- abs(ref_proj[[i]] - fut_proj[[i]])
      names(go_proj)[i] <- paste0("RDA", as.character(i))
    }
  }

  # Weights based on axis eigen values
  weights <- snp_set$rda_model$CCA$eig/sum(snp_set$rda_model$CCA$eig)
  
  # Weight the current and future adaptive indices based on the eigen values of the associated axes
  ref_proj_weighted <- do.call(cbind, lapply(1:K, function(x) rasterToPoints(ref_proj[[x]])[,-c(1,2)]))
  ref_proj_weighted <- as.data.frame(do.call(cbind, lapply(1:K, function(x) ref_proj_weighted[,x]*weights[x])))
  fut_proj_weighted <- do.call(cbind, lapply(1:K, function(x) rasterToPoints(fut_proj[[x]])[,-c(1,2)]))
  fut_proj_weighted <- as.data.frame(do.call(cbind, lapply(1:K, function(x) fut_proj_weighted[,x]*weights[x])))
  
  # Predict a global genetic offset, incorporating the K first axes weighted by their eigen values
  go_global_proj <- go_proj[[1]]
  go_global_proj[!is.na(go_global_proj)] <- unlist(lapply(1:nrow(ref_proj_weighted), function(x) dist(rbind(ref_proj_weighted[x,], fut_proj_weighted[x,]), method = "euclidean")))
  names(go_global_proj) <- "global_offset"
  
  # Return projections for current and future climates for each RDA axis, prediction of genetic offset for each RDA axis and a global genetic offset 
  return(list(ref_proj = ref_proj, fut_proj = fut_proj, go_proj = go_proj, go_global_proj = go_global_proj, weights = weights[1:K]))
}
Code
# For each snp set and each GCM, we generate a list of rasters with:
  # the projection of AI for reference_period and future climates
  # genomic offset predictions for each RDA axis 
  # genomic offset predictions incorporating the K first axes weighted by their eigen values
go_rasters <- snp_sets %>% lapply(function(snp_set){
  lapply(names(clim_dfs$clim_pred), function(gcm){
  
  pred_GO_rasters(snp_set,clim_var,K=2,gcm)}) %>% 
    setNames(names(clim_dfs$clim_pred))
  
  })

go_rasters %>% saveRDS(file=here("outputs/RDA/go_rasters.rds"))
Code
go_rasters <- readRDS(file=here("outputs/RDA/go_rasters.rds"))

# Map options
# ===========
point_size=2
x_limits = c(-10, 15)
y_limits = c(31, 52)

# get maximum and minimum genomic offset values across the different GO predictions (for each snp set and each GCM)
# so that comparing the maps will be easier
go_limits <- names(snp_sets) %>% lapply(function(x){
  
lapply(names(clim_dfs$clim_pred), function(gcm){
  
rasterToPoints(go_rasters[[x]][[gcm]]$go_global_proj) %>% as_tibble() %>% pull(global_offset)

}) %>% 
    unlist()
  
}) %>% 
  unlist() %>% 
  range()

# The minimum GO value is very very small, almost zero, so we fix it to zero.
go_limits[[1]] <- 0

# Mapping
# =======
go_maps <- names(snp_sets) %>% lapply(function(x){
  
go_maps <- lapply(names(clim_dfs$clim_pred), function(gcm){
  
# extract genomic offset values 
go_dfs <- rasterToPoints(go_rasters[[x]][[gcm]]$go_global_proj) %>% as_tibble()


ggplot(data = go_dfs) + 
  geom_sf(data = world, fill="gray98") + 
  scale_x_continuous(limits = x_limits) +
  scale_y_continuous(limits = y_limits) + 
  geom_raster(aes(x = x, y = y, fill = global_offset), alpha = 1) + 
  scale_fill_gradient2(low="blue", mid= "yellow", high="red", 
                       midpoint=(max(go_limits[[2]])-min(go_limits[[1]]))/2,
                       limits=go_limits,
                       name = "Genomic offset") +
  xlab("Longitude") + ylab("Latitude") +
  ggtitle(gcm) +
  theme_bw(base_size = 11) +
  theme(panel.grid = element_blank(), 
        plot.background = element_blank(), 
        panel.background = element_blank(), 
        #legend.position = c(0.85,0.15),
        legend.box.background = element_rect(colour = "gray80"),
        strip.text = element_text(size=11))

})

legend_maps  <- get_legend(go_maps[[1]])

go_maps <- lapply(go_maps, function(y) y + theme(legend.position = "none"))

go_maps$legend_maps <- legend_maps

go_maps <-plot_grid(plotlist=go_maps)

title <- ggdraw() + 
  draw_label(
    snp_sets[[x]]$set_name,
    fontface = 'bold',
    x = 0,
    hjust = 0) +
  theme(plot.margin = margin(0, 0, 0, 7))

# merge title and plots
plot_grid(title, go_maps, ncol = 1,rel_heights = c(0.1, 1))
  
})

# export maps pdf
pdf(here("figs/RDA/GOmaps.pdf"), width=14,height=9.5)
lapply(go_maps, function(x) x)
dev.off()

# show maps
lapply(go_maps, function(x) x)

Corr btw predictions with rasters or spatial points

We check that the genomic offset predictions we obtained with the spatial points are highly correlated (ie similar) to those obtained with the rasters.

Comment When we extract the genomic offset values from the rasters at the location of the populations, we have missing values for some populations. Indeed, some populations are not included in the buffer of the species range that I used, based on the EUFORGEN distribution and 10km around the NFI plots. We may want to modify the buffer of the species range to include those populations!

Code
# checking correlations
names(snp_sets) %>% lapply(function(x){
  
cor_go <- lapply(names(clim_dfs$clim_pred), function(gcm){
  
go_rast <- raster::extract(go_rasters[[x]][[gcm]]$go_global_proj, pop_coord[,c("longitude","latitude")])

cor(snp_sets[[x]]$go[[gcm]],go_rast,use= "complete.obs")
  
}) %>% unlist() 
  
tibble(gcm=names(clim_dfs$clim_pred),
       cor_go=cor_go)
  
}) %>% 
  setNames(names(snp_sets)) %>% 
  bind_rows(.id="snp_set") %>% 
  kable_mydf()
snp_set gcm cor_go
cand GFDL-ESM4 0.99
cand IPSL-CM6A-LR 1.00
cand MPI-ESM1-2-HR 1.00
cand MRI-ESM2-0 1.00
cand UKESM1-0-LL 1.00
cand_corrected GFDL-ESM4 0.99
cand_corrected IPSL-CM6A-LR 1.00
cand_corrected MPI-ESM1-2-HR 1.00
cand_corrected MRI-ESM2-0 1.00
cand_corrected UKESM1-0-LL 1.00
control GFDL-ESM4 1.00
control IPSL-CM6A-LR 1.00
control MPI-ESM1-2-HR 1.00
control MRI-ESM2-0 1.00
control UKESM1-0-LL 1.00

This is ok!

Validation - NFI plots

Code
# Load the climatic data of the NFI plots.
nfi_clim <- readRDS(here("data/ClimaticData/NFIplots/NFIclimate.rds"))
  
# Keep only the climatic variables of interest and scale the climatic data
nfi_dfs <- generate_scaled_clim_datasets(clim_var, clim_ref = nfi_clim$clim_ref, clim_pred = nfi_clim$clim_survey)

# calculate the genomic offset for the NFI plots
snp_sets <- lapply(snp_sets, function(x){
  
x$go_nfi <- pred_GO_spatial_points(snp_set = x,
                                   K = 2, # why K=2??
                                   clim_var=clim_var,
                                   clim_ref = nfi_dfs$clim_ref,
                                   clim_pred = nfi_dfs$clim_pred,
                                   weights = T)
return(x)
})

# checking missing data
# lapply(snp_sets, function(x) sum(is.na(x$go_nfi)))

# Find minimum and maximum values of genomic offset for the maps
go_limits <- lapply(snp_sets, function(snp_set) snp_set$go_nfi) %>%  unlist() %>% range()
# The minimum GO value is very very small, almost zero, so we fix it to zero.
go_limits[[1]] <- 0


# map genomic offset predictions in the NFI plots 
lapply(snp_sets, function(x) make_go_map(
  dfcoord= readRDS(here("data/ClimaticData/NFIplots/NFIclimate.rds"))[[1]] %>% dplyr::select(contains("ude")), 
  snp_set = x,
  type="NFI",
  point_size = 0.5,
  go_limits = go_limits,
  legend_position = c(0.85,0.15),
  legend_box_background = "gray80",
  y_limits = c(35, 51)))

We look at the correlation across the different genomic offset predictions in the NFI plots, i.e. those based on all SNPs and those based on sets of candidates or control SNPs.

Code
lapply(snp_sets, function(x) x$go_nfi) %>% 
  as_tibble() %>%
  cor() %>% 
  corrplot(method = 'number',type = 'lower', diag = FALSE,mar=c(0,0,2,0),
               number.cex=2,tl.cex=1.5)

Validation - Common gardens

Code
cg_clim <- readRDS(here("data/ClimaticData/CommonGardens/ClimateCG.rds")) %>%  dplyr::select(cg,any_of(clim_var))
cg_coord <- readRDS(here("data/ClimaticData/CommonGardens/ClimateCG.rds")) %>% dplyr::select(cg,contains("ude"))
cg_names <- unique(cg_coord$cg)

# Generate scaled climatic datasets with climatic data at the location of the populations and at the location of the common gardens
cg_dfs <- generate_scaled_clim_datasets(clim_var, clim_pred = cg_clim)
  
# Predict genomic offset of each population when transplanted in the climate of the common gardens
snp_sets <- lapply(snp_sets, function(x){
  
x$go_cg <- pred_GO_spatial_points(snp_set = x,
                                  K = 2, # why K=2??
                                  clim_var = clim_var,
                                  clim_ref = cg_dfs$clim_ref,
                                  clim_pred = cg_dfs$clim_pred,
                                  weights = T,
                                  CG=T) %>% as_tibble()
return(x)
})

# Map genomic offset predictions at the locations of the populations
go_maps_cg <- lapply(cg_names, function(cg_name){

# Find minimum and maximum values of genomic offset for the maps
go_limits <- lapply(snp_sets, function(snp_set) snp_set$go_cg[[cg_name]]) %>%  unlist() %>% range()
go_limits[[1]] <- 0

p <- lapply(snp_sets, function(x) make_go_map(dfcoord=pop_coord, 
                                              snp_set = x,
                                              point_size = 3,
                                              type="CG",
                                              go_limits = go_limits,
                                              cg_name=cg_name,
                                              cg_coord=cg_coord))

plot_grid(p[[1]],p[[2]],p[[3]],nrow=1)
  
}) %>% setNames(cg_names)

pdf(here("figs/RDA/GOmaps_CGs.pdf"), width=15,height=6)
lapply(go_maps_cg, function(x) x)
dev.off()

# show maps
lapply(go_maps_cg, function(x) x)

We look at the correlation across the different genomic offset predictions in the common gardens, i.e. those based on all SNPs and those based on sets of candidates or control SNPs.

Code
par(mfrow=c(1,3))

lapply(cg_names, function(cg_name){

lapply(snp_sets, function(x) x$go_cg) %>% 

lapply(function(set){
  set[[cg_name]]
}) %>% 
    as_tibble() %>% 
    cor() %>% 
    corrplot(method = 'number',type = 'lower', 
             diag = FALSE,mar=c(0,0,2,0),
             title=str_to_title(cg_name),
             number.cex=2,tl.cex=1.5)

})

Let’s save the genomic offset predictions for comparison with the other methods.

Code
snp_sets %>% saveRDS(file=here("outputs/RDA/go_predictions.rds"))

References

Capblancq, Thibaut, and Brenna R Forester. 2021. “Redundancy Analysis (RDA): A Swiss Army Knife for Landscape Genomics.” Methods in Ecology and Evolution.